from urllib import request
import chardet

# 注意键值对key:value的''
# header={"key"："value"}
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '\
    'AppleWebKit/537.36 (KHTML, like Gecko) '\
    'Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62'}

rq = request.Request('https://quanjing.com/', headers = headers)
res = request.urlopen(rq)
b = res.read()  # 返回bytes字节流
# print(type(b))
# print(b[:200])

# gb2312通常使用兼容的GBK/GB18030解码
# 通常正规网站会有<meta charset="gbk" />
print(chardet.detect(b))
s = b.decode('utf-8')
# print(type(s))
# print(s)

import re
# 确定下载图片范围
area = re.findall(r'<!--\s*主要内容\s*-->.*?<!--\s*主要内容\s*-->', s, re.DOTALL)
# findall得到的是list，找到一个内容，使用area[0]即可
s_area = area[0]

target = re.findall('<img\s+src="(.*?)"', s_area)
# print(len(target))
# print(target)

for i in target:
    res02 = request.urlopen(i)
    pic_bytes = res02.read()
    with open(f'D:\Programming\Python\PlayTogether\进阶篇\class29_crawlerPic\pic_download\{i[-10:]}', 'wb') as f:
        f.write(pic_bytes)
print('Finished')